# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html


# useful for handling different item types with a single interface
from itemadapter import ItemAdapter

import dataset
from .utils.data_operate import base_dir
from .utils.logger import logger


class JobInformationPipeline:
    def open_spider(self, spider):
        db_name = spider.settings.get('SQLITE_DB_NAME', base_dir)
        self.db_conn = dataset.connect('sqlite:///{}'.format(db_name),
                                       engine_kwargs={'connect_args': {'check_same_thread': True}},
                                       sqlite_wal_mode=False)
        self.information_table = self.db_conn['job_information']

    def process_item(self, item, spider):
        self.insert_job_information(item)
        self.db_conn.commit()
        return item

    def insert_job_information(self, item):
        logger.info('等待处理的数据 : ' + str(item))
        if item['key_word'] != "None":
            url_sql = "select url from job_information;"
            urls = [x['url'] for x in self.db_conn.query(url_sql)]
            if item['url'] in urls:
                logger.info(item['url'] + " 已存在,不再写入.")
                return
            values = dict(exam_type=item['exam_type'],
                          url=item['url'],
                          title=item['title'],
                          key_word=item['key_word'],
                          publish_at=item['publish_at'],
                          )
            logger.info("写入的数据是: " + str(values))
            self.information_table.insert(values)
        else:
            logger.info(item['title'] + "未匹配到关键词,不写入数据.")

    def spider_close(self):
        self.db_conn.close()


class CommonSensePipeline:
    def process_item(self, item, spider):
        return item
